其他
满足临床研究中找P值有意义全部要求的R包-Publish(一)
Editor's Note
胖子老师总能搞点新花样
The following article is from 灵活胖子的科研进步之路 Author 灵活胖子1988
前言及说明
github地址:https://github.com/tagteam/Publish 函数教程参考网址:https://rdrr.io/cran/Publish/
这个package包括一系列方便的函数,将一些基本统计分析的结果转换为适合发布的表格格式与森林图。包括描述性表格、逻辑回归和Cox回归。写作的过程中会同时分享自己在做数据分析中确定P值阳性的心法。
体会
1.subgroupAnalysis函数可以自动进行亚组分析返回所有需要进一步分析的数据组成表格,后续也可以很方便的并进行自动绘图函数 2.acut函数可以方便的根据要求将连续变量进行离散化 3.coxphSeries函数与glmSeries函数可以对批量对一组变量进行批量调整协变量的cox与逻辑分析。 4.plot可以方便的绘制默认参数的森林图,plotConfidence函数也可以绘制高度定制的图片以满足顶刊的发表要求。
1. subgroupAnalysis函数及其绘图
阳性P值第一式:亚组的结果和全组人群有时候不同,特别是在调整一些协变量后,因为交互作用等原因往往会出现P值有意义的情况。
#load libraries
library(data.table)
library(Publish)
library(survival)
#获得测试数据
data(traceR)
data.table::setDT(traceR)
traceR[,':='(wmi2=factor(wallMotionIndex<0.9,levels=c(TRUE,FALSE),
labels=c("bad","good")),
abd2=factor(abdominalCircumference<95, levels=c(TRUE,FALSE),
labels=c("slim","fat")))]
traceR[,sex:=as.factor(sex)] # all subgroup variables needs to be factor
traceR[observationTime==0,observationTime:=1]
traceR=na.omit(traceR)
glimpse(traceR)
#拟合单因素cox回归方程
fit_cox <- coxph(Surv(observationTime,dead)~treatment,data=traceR)
#进行亚组分析
sub_cox <- subgroupAnalysis(object = fit_cox,#单因素回归方程
data = traceR,#数据集
treatment="treatment", #治疗因素(主要研究自变量)
subgroups=c("smoking","sex","wmi2","abd2")#需要进行亚组分析的变量
)
sub_cox
plot(sub_cox)
## 调整其他协变量的亚组分析及交互性作用P值计算
fit_cox_adj <- coxph(Surv(observationTime,dead)~treatment+smoking+sex+wmi2+abd2,
data=traceR)
sub_cox_adj <- subgroupAnalysis(fit_cox_adj,
traceR,
treatment="treatment",
subgroups=c("smoking","sex","wmi2","abd2"))
#泊松回归的亚组分析
fit_p <- glm(dead~treatment+age+offset(log(observationTime)),family="poisson",
data=traceR)
sub_pois <- subgroupAnalysis(fit_p,traceR,treatment="treatment",
subgroups=~smoking+sex+wmi2+abd2)
plot(sub_pois)
# 逻辑回归结果
fit_log <- glm(dead~treatment+age,family="binomial",data=traceR)
sub_log <- subgroupAnalysis(object = fit_log,
data = traceR,
treatment="treatment",
subgroups=~smoking+sex+wmi2+abd2,
factor.reference="inline")
sub_log
plot(sub_log)
2.acut函数将连续变量进行离散化
阳性P值第二式:有些变量的量级跨度很大,对其进行离散化后有可能会得到与原始数据很不一样的结果。
data(Diabetes) #载入测试数据
glimpse(Diabetes)
## 将连续变量进行分类话,默认为5分类(分位数划分)
chol.groups <- acut(Diabetes$chol)
table(chol.groups)
## 更改默认显示分类格式
chol.groups <- acut(Diabetes$chol,format="%l-%u",n=5)
table(chol.groups)
## 更改分类个数
chol.groups <- acut(Diabetes$chol,n=7)
table(chol.groups)
## 可以利用breaks参数设定间隔以划分连续变量
age.groups <- acut(Diabetes$age,format="%l-%u",
breaks=seq(0,100,by=10))
table(age.groups)
## 设定标准,将标准以上的分类划分为一类并命名
age.groups <- acut(Diabetes$age,
format="%l-%u",
format.low="below %u",
format.high="above %l",
breaks=c(0, seq(20,80,by=10), Inf))
table(age.groups)
# 利用org函数对结果进行标准化表示
BMI.groups <- acut(Diabetes$BMI,
format="BMI between %l and %u",
format.low="BMI below %u",
format.high="BMI above %l")
table(BMI.groups)
org(as.data.frame(table(BMI=BMI.groups)))
## 设定相同间隔而不是相同分数位为分组依据
BMI.grouping <-
seq(min(Diabetes$BMI,na.rm=TRUE), max(Diabetes$BMI,na.rm=TRUE), length.out=6)
BMI.grouping[1] <- -Inf #将下限设定位无穷小意包括所有结果
BMI.grouping
BMI.groups <- acut(Diabetes$BMI,
breaks=BMI.grouping,
format="BMI between %l and %u",
format.low="BMI below %u",
format.high="BMI above %l")
table(BMI.groups)
org(as.data.frame(table(BMI=BMI.groups)))
3.coxphSeries函数与glmSeries函数可以进行调整协变量的批量回归分析
阳性P值第三式:当有多个自变量和一个因变量(比如生存结果或二分类结果),我们可以批量进行单因素回归分析筛选阳性结果。有时候调整其他因素后进行多因素的结果与单因素结果会有明显不同,这时候我们可以批量调整协变量的多因素分析以确定阳性的自变量。
library(survival)
library(gt)
library(gtsummary)
#构建测试数据
data(pbc)
pbc$edema <- factor(pbc$edema,levels=c("0","0.5","1"),labels=c("0","0.5","1"))
glimpse(pbc)
# 批量对c("edema","bili","protime")这三个因素进行单因素cox分析
uni.hr <- coxphSeries(Surv(time,status==2)~1,
vars=c("edema","bili","protime"),
data=pbc)
uni.hr
#gtsummary包进行验证、
tbl_uv_ex2 <-
tbl_uvregression(
pbc[,c("edema","bili","protime","time","status")],
method = coxph,
y = Surv(time,status==2),
exponentiate = TRUE,
pvalue_fun = function(x) style_pvalue(x, digits = 2)
)
tbl_uv_ex2
## 控制其他协变量后批量多因素分析结果、
## 调整的协变量为age与sex,待分析变量为"edema","bili","protime"
controlled.hr <- coxphSeries(Surv(time,status==2)~age+sex,
vars=c("edema","bili","protime"),
data=pbc)
controlled.hr
# glm广义估计方程进行逻辑回归的用法类似
data(Diabetes)
Diabetes$hyper1 <- factor(1*(Diabetes$bp.1s>140))
## collect odds ratios from three univariate logistic regression analyses
uni.odds <- glmSeries(hyper1~1,
vars=c("chol","hdl","location"),
data=Diabetes,family=binomial)
uni.odds
## control the logistic regression analyses for age and gender
## but collect only information on the variables in `vars'.
controlled.odds <- glmSeries(hyper1~age+gender,
vars=c("chol","hdl","location"),
data=Diabetes, family=binomial)
controlled.odds
4. plotConfidence函数可以绘制高度定制的森林图
#加载测试数据(数据格式类似于亚组分析)
data(CiTable)
##绘制森林图操作
## x参数为绘制森林图的函数(必须包括点估计及CI)
## lables参数为名称参数,代表对应行应该显示的名称
plotConfidence(x=CiTable[,c("HazardRatio","lower","upper","p")],
labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")])
## 将宽数据格式进行分组(将文字部分重新组合)
labellist <- split(CiTable[,c("Dose","Time","Mean","SD","n")],#待分组依据
CiTable[,"Drug"])#组别依据
labellist
## 将宽数据格式进行分组(将绘制森林图的数据部分重新组合)
CC= data.table::rbindlist(split(CiTable[,c("HazardRatio","lower","upper")],
CiTable[,"Drug"]))#分组因素必须与上面labellist一样
plotConfidence(x=CC, labels=labellist)
#这个图表最多包含三列:
## 第一列:标签
## 第二列:置信区间的文字内容
## 第三列:置信区间的图形表示(森林图)
## 注意:第三列始终会出现,用户可以决定是否还要出现第一列和第二列
## 这些列会通过布局函数进行排列,默认的顺序是1、3、2,这样置信区间的文字会出现在中间
## 三列的出现顺序可以按照以下方式进行更
#默认排序
plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")],
labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")],
order=c(1,3,2))
#将森立图放在第一列
plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")],
labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")],
order=c(2,3,1))
## 如果仅有两列,绘制森林图的数据将不会显示
plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")],
labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")],
values=FALSE,
order=c(2,1))
plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")],
labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")],
values=FALSE,
order=c(1,2))
## xratio可以控制每列大小的比例
## leftmargin=0.1,rightmargin=0.00这两个参数可以控制图到页边的距离
plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")],
labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")],
xratio=c(0.4,0.15),
leftmargin=0.1,rightmargin=0.5)
to be continued
关注下方公众号,分享更多更好玩的R语言知识。
点个在看,SCI马上发表。